#function to calc percentiles
perc.rank <- function(x) {
  y<-rank(x)/length(x)
  return(y)}

data<-data_student

#get admission score percentiles
data$entrance_perc_town<-NA
data[!is.na(data$media_la_admitere),]<-data[!is.na(data$media_la_admitere),] %>%
  group_by(an,Cod_SIRUTA2_hs_adm) %>%
  mutate(entrance_perc_town=perc.rank(media_la_admitere)) %>%
  ungroup

data<-data %>% mutate(dec_town=cut(entrance_perc_town, breaks = c(-Inf, 0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9, Inf), 
                                                   labels = c('1','2','3','4','5','6','7','8','9','10'), right = FALSE))


#clean
data_transform<-data %>%
  dplyr::select(judet_bac,judet_adm,Cod_SIIIR_hs_adm,unitate_de_invatamant,liceu_repartizat,Cod_SIRUTA2_hs_adm,entrance_perc,dec_town,grad_perc,an_bac,
         an,specializare_adm,specializare_lb,scoala_de_provenienta,media_la_admitere,entrance_perc_town) %>%
  mutate(p=trunc(entrance_perc*100),
         p_exam=trunc(entrance_perc*100),
         v=cut(entrance_perc,c(-Inf,seq(from=0.05,to=0.95,by=0.05),Inf),labels=1:20)) %>%
  group_by(an_bac,Cod_SIRUTA2_hs_adm) %>%
  mutate(n_hs=n_distinct(Cod_SIIIR_hs_adm[!is.na(Cod_SIIIR_hs_adm)])) %>%
  ungroup() %>%
  filter(n_hs>0 & !is.na(dec_town)) %>%
  mutate(n_hs_town_group=cut(n_hs,c(-Inf,1,2,3,15,Inf),labels=c("1","2","3","4-15","16+"))) %>%
  mutate(n_graph=case_when(
    n_hs_town_group=='1' ~ 0,
    n_hs_town_group=='2' ~ 1,
    n_hs_town_group=='3' ~ 2,
    n_hs_town_group=='4-15' ~ 3,
    n_hs_town_group=='16+' ~ 4
  )) %>%
  mutate(n_graph=n_graph/4*0.5-2/4*0.5+as.numeric(dec_town)) 

data_transform<-data_transform %>%
  group_by(an,scoala_de_provenienta) %>%
  mutate(n_st_ms=n(),peer_entrance_ms=(sum(entrance_perc)-entrance_perc)/(n_st_ms-1)) %>%
  group_by(an,liceu_repartizat) %>%
  mutate(n_st_hs=n(),peer_entrance_hs=(sum(entrance_perc)-entrance_perc)/(n_st_hs-1)) %>%
  group_by(an,liceu_repartizat,specializare_adm,specializare_lb) %>%
  mutate(n_st_hs_track=n(),peer_entrance_hs_track=(sum(entrance_perc)-entrance_perc)/(n_st_hs_track-1))



#means by entrance dec_town for different types of peers
summary_ms<-data_transform %>%
  filter(liceu_repartizat!='') %>%
  group_by(n_hs_town_group,n_graph,dec_town) %>%
  summarize(mean=mean(peer_entrance_ms,na.rm=T),
            sd=sd(peer_entrance_ms,na.rm=T)) %>%
  ungroup %>%
  mutate(Peers="Middle School")
summary_hs<-data_transform %>%
  filter(liceu_repartizat!='') %>%
  group_by(n_hs_town_group,n_graph,dec_town) %>%
  summarize(mean=mean(peer_entrance_hs,na.rm=T),
            sd=sd(peer_entrance_hs,na.rm=T)) %>%
  ungroup %>%
  mutate(Peers="High School")
summary_hs_track<-data_transform %>%
  filter(liceu_repartizat!='') %>%
  group_by(n_hs_town_group,n_graph,dec_town) %>%
  summarize(mean=mean(peer_entrance_hs_track,na.rm=T),
            sd=sd(peer_entrance_hs_track,na.rm=T)) %>%
  ungroup %>%
  mutate(Peers="High School Track")
summary<-bind_rows(summary_hs_track,summary_ms,summary_hs) %>%
  mutate(Peers=factor(Peers, levels = c("Middle School", "High School", "High School Track")))

#graph
g<-ggplot(summary)+
  facet_wrap(~ Peers)+
  geom_point(aes(x=dec_town,y=mean*100,color=n_hs_town_group),size=3)+
  geom_line(aes(x=dec_town,y=mean*100,color=n_hs_town_group,group=n_hs_town_group))+
  xlab("Own Admission Score Decile (Town-Cohort)")+
  ylab("Peer Admission Score Percentile (National-Cohort)")+
  scale_x_discrete(breaks=0:10)+
  scale_y_continuous(breaks=seq(from = 0, to = 100, by = 10),limits=c(0,100))+
  labs(color=str_wrap("Number of High Schools in Town",width=10))+
  theme(axis.text.x=element_text(size=12),
        axis.text.y = element_text(size=12),  
        axis.title.x = element_text(size=14),
        axis.title.y = element_text(size=14),
        legend.title.align=0.5,
        strip.text.x = element_text(size = 18),
        legend.title=element_text(size=18),
        legend.text=element_text(size=18))
g


#differences in means (used in text):
#middle school 
summary_ms %>% filter(n_hs_town_group==1 & dec_town==10) %>% dplyr::select(mean)-
summary_ms %>% filter(n_hs_town_group==1 & dec_town==1) %>% dplyr::select(mean)

summary_ms %>% filter(n_hs_town_group=='16+' & dec_town==10) %>% dplyr::select(mean)-
  summary_ms %>% filter(n_hs_town_group=='16+' & dec_town==1) %>% dplyr::select(mean)

#hs
summary_hs %>% filter(n_hs_town_group==1 & dec_town==10) %>% dplyr::select(mean)-
  summary_hs %>% filter(n_hs_town_group==1 & dec_town==1) %>% dplyr::select(mean)

summary_hs %>% filter(n_hs_town_group=='16+' & dec_town==10) %>% dplyr::select(mean)-
  summary_hs %>% filter(n_hs_town_group=='16+' & dec_town==1) %>% dplyr::select(mean)


#compute gaps
(summary %>% filter(n_hs_town_group==1 & dec_town==1) %>% dplyr::select(mean)-
summary %>% filter(n_hs_town_group=='16+' & dec_town==1) %>% dplyr::select(mean) )*100

(summary %>% filter(n_hs_town_group==1 & dec_town==1) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='2' & dec_town==1) %>% dplyr::select(mean) )*100

(summary %>% filter(n_hs_town_group==1 & dec_town==1) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='16+' & dec_town==1) %>% dplyr::select(mean)) *100

#10th decile
(summary %>% filter(n_hs_town_group==1 & dec_town==10) %>% dplyr::select(mean)-
  summary %>% filter(n_hs_town_group=='16+' & dec_town==10) %>% dplyr::select(mean) )*100

(summary %>% filter(n_hs_town_group=='2' & dec_town==10) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='1' & dec_town==10) %>% dplyr::select(mean) )*100


(summary %>% filter(n_hs_town_group==1 & dec_town==10) %>% dplyr::select(mean)-
    summary %>% filter(n_hs_town_group=='16+' & dec_town==10) %>% dplyr::select(mean) )*100



#save graph
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))

pdf("03_figure_01_color.pdf" , width =10  , height = 5) 
g
dev.off()

pdf("03_figure_01.pdf" , width =10  , height = 5) 
g + scale_color_grey()+ scale_fill_grey()
dev.off()


